{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Import dataset \n",
    "Importing the iris dataset and then partitioning it into test and training dataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "150\n"
     ]
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "iris = datasets.load_iris()\n",
    "\n",
    "#print(iris)\n",
    "# X = inputs for the classifier\n",
    "X = iris.data\n",
    "\n",
    "# y = ouput \n",
    "y = iris.target\n",
    "print(y.size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\amanullahtariq\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "# We can either manually partition dataset into test and training dataset or either use cross validation\n",
    "from sklearn.cross_validation import train_test_split\n",
    "\n",
    "#help(train_test_split)\n",
    "# Using half of the dataset for testing\n",
    "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.5)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## K-Neighbors Classifier\n",
    "Creating custom KNN classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "'return' outside function (<ipython-input-15-0e769b65830b>, line 31)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-15-0e769b65830b>\"\u001b[0;36m, line \u001b[0;32m31\u001b[0m\n\u001b[0;31m    return self.y_tain[best_index]\u001b[0m\n\u001b[0m    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m 'return' outside function\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "from scipy.spatial import distance\n",
    "#help (distance)\n",
    "\n",
    "def euc(point1 , point2):\n",
    "    return distance.euclidean(point1, point2)\n",
    "\n",
    "class KNNClassifier():\n",
    "    def fit(self,X_train, y_train):\n",
    "        self.X_train = X_train\n",
    "        self.y_train = y_train\n",
    "    \n",
    "     def closest(self,row):\n",
    "        best_index = 0\n",
    "        best_dist = euc(row,self.X_train[0])\n",
    "        \n",
    "        for i in range(1, len(self.X_train)):\n",
    "            dist = euc(row,self.X_train[i])\n",
    "            if best_dist > dist:\n",
    "                best_dist = dist\n",
    "                best_index = i\n",
    "        \n",
    "        return self.y_tain[best_index]\n",
    "    \n",
    "    def predict(self,X_test):\n",
    "        predictions = []\n",
    "        for row in X_test:\n",
    "            label = closest(row)\n",
    "            predictions.append(label)\n",
    "        \n",
    "        return predictions\n",
    "    \n",
    "   \n",
    "    \n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'closest' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-14-639a8e459627>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mmy_classifier\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mpredictions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmy_classifier\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mprint\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mpredictions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-13-b531a54b3300>\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X_test)\u001b[0m\n\u001b[1;32m     14\u001b[0m         \u001b[0mpredictions\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mrow\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m             \u001b[0mlabel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclosest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     17\u001b[0m             \u001b[0mpredictions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'closest' is not defined"
     ]
    }
   ],
   "source": [
    "#from sklearn.neighbors import KNeighborsClassifier\n",
    "my_classifier = KNNClassifier()\n",
    "my_classifier.fit(X_train, y_train)\n",
    "\n",
    "predictions = my_classifier.predict(X_test)\n",
    "print (predictions)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Checking accuracy of the classifier\n",
    "from sklearn.metrics import accuracy_score\n",
    "accuracy_score(y_test,predictions)"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
